query <- "
SELECT
commit.repo_name AS repo_name,
commit.file_name AS file_name,
commit.path AS path,
commit.sha AS sha,
commit.init_commit_timestamp AS init_commit,
loc.language AS language
FROM
[github-bioinformatics-171721:repos.file_init_commit] AS commit
LEFT JOIN
[github-bioinformatics-171721:analysis.lines_of_code_by_file] AS loc
ON
commit.sha = loc.sha
WHERE
loc.language IS NOT NULL
"
data_all_lang <- query_exec(query, project = proj, max_pages = Inf) %>%
select(language, init_commit) %>%
mutate(init_commit = parse_iso_8601(init_commit)) %>%
mutate(year = cut(as.Date(init_commit), breaks = "year")) %>%
select(year, language) %>%
mutate(year = as.numeric(substr(year, 1, 4)))
## 0 bytes processed
total_files_per_year <- data_all_lang %>%
select(year) %>%
group_by(year) %>%
summarize(total_files_year = n())
data <- data_all_lang %>%
filter(language %in% top_langs) %>%
group_by(year, language) %>%
summarise(num_files = n()) %>%
arrange(year) %>%
left_join(total_files_per_year, by = "year") %>%
mutate(pct_total_files_year = num_files / total_files_year) %>%
mutate(year_fmt = substr(year, 1, 4))
ggplot(data %>% filter(year_fmt >= 2010),
aes(x = year_fmt,
y = pct_total_files_year,
colour = language,
group = language)) +
geom_line() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
plot.title = element_text(size = 24)) +
xlab("Year") +
ylab("Percentage of total files created") +
ggtitle("Language use over time") +
scale_color_discrete("Language")

plot_lang_pct <- function(lang) {
ggplot(data %>% filter(year_fmt >= 2010 & language == lang),
aes(x = year_fmt,
y = pct_total_files_year,
group = language)) +
geom_line() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
plot.title = element_text(size = 24)) +
xlab("Year") +
ylab("Percentage of total files created") +
ggtitle(paste("Percentage of total files created:", lang))
}
plot_lang_total <- function(lang) {
ggplot(data %>% filter(year_fmt >= 2010 & language == lang),
aes(x = year_fmt,
y = num_files,
group = language)) +
geom_line() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
plot.title = element_text(size = 24)) +
xlab("Year") +
ylab("Total files created") +
ggtitle(paste("Number of files created:", lang))
}
for(lang in top_langs) {
print(plot_lang_total(lang))
print(plot_lang_pct(lang))
}























